package com.jjh.util;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;

import org.htmlparser.Parser;
import org.htmlparser.visitors.HtmlPage;

public class HtmlDocParser {

	String path = "D:\\jjh\\index.html";
	File html = null;
	
	public HtmlDocParser(String htmlPath) {
		this.path = htmlPath;
		html = new File(htmlPath);
	}

	//得到网页的链接地址
	public String getPath() {
		return this.path;
	}

	//得到网页的标题
	public String getTitle() {
		try {
			Parser parser = new Parser();
			parser.setResource(path);
			
			HtmlPage htmlPage = new HtmlPage(parser);
			parser.visitAllNodesWith(htmlPage);
			return htmlPage.getTitle();
		} catch (Exception e) {
			e.printStackTrace();
		}
		
		return "";
	}

	//得到文件的内容
	public String getContent() {
		StringBuffer str = new StringBuffer();
		
		try {
			InputStreamReader read = new InputStreamReader(new FileInputStream(html));
			BufferedReader ins = new BufferedReader(read);
			
			String dataline = "";
			try {
				while(ins.readLine() != null){
					dataline = ins.readLine();
					str.append(dataline);
				}
			} catch (IOException e) {
				// TODO Auto-generated catch block
				e.printStackTrace();
			}
			try {
				ins.close();
			} catch (IOException e) {
				// TODO Auto-generated catch block
				e.printStackTrace();
			}
		} catch (FileNotFoundException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
		
		return str.toString();
	}

}
